###
### Copyright 2009 The Chicago Independent Radio Project
### All Rights Reserved.
###
### Licensed under the Apache License, Version 2.0 (the "License");
### you may not use this file except in compliance with the License.
### You may obtain a copy of the License at
###
###     http://www.apache.org/licenses/LICENSE-2.0
###
### Unless required by applicable law or agreed to in writing, software
### distributed under the License is distributed on an "AS IS" BASIS,
### WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
### See the License for the specific language governing permissions and
### limitations under the License.
###

from common.third_party.BeautifulSoup import BeautifulSoup

_VALID_TAGS = set(["p", "i", "b", "u", "a", "br"])
_VALID_ATTRS = set(["href"])


def sanitize_html(text):
    soup = BeautifulSoup(text)
    for tag in soup.findAll(True):
        if tag.name not in _VALID_TAGS:
            tag.hidden = True
        tag.attrs = [(attr, val) for attr, val in tag.attrs
                     if attr in _VALID_ATTRS]
    return soup.renderContents().decode('utf8')
    

def valid_tags_description():
    """A human-readable description of the valid HTML_tags."""
    return " ".join(sorted("<%s>" % tag for tag in _VALID_TAGS))
